library(plyr)
library(ggplot2)
library(rstudioapi)
circular.mean <- function(X) {
x <- mean(cos(as.numeric(names(X))) * X)
y <- mean(sin(as.numeric(names(X))) * X)
n <- sum(X)
theta.mean <- atan2(y, x)
if(theta.mean < 0)
theta.mean = theta.mean + 2 * pi
theta.var = sqrt(x^2 + y^2) / n
return(c(theta.mean, theta.var))
}
resample.means <- function(X, Y, I) {
n <- length(Y)
Y.means <- numeric(n)
X.index = 0
for(i in 1:n) {
Y.means[i] <- mean(X[I[(X.index + 1):(X.index + Y[i])]])
X.index = X.index + Y[i]
}
names(Y.means) <- names(Y)
return(Y.means)
}
path = dirname(rstudioapi::getSourceEditorContext()$path)
ds.og.file = paste(path, "/Accidents_Trafic_Catalunya.csv", sep = '')
ds.file = paste(path, "/ATC.csv", sep = '')
update = F
if(update || !file.exists(ds.file)) {
dataset <- read.csv(ds.og.file)
for(i in 1:nrow(dataset)) {
di <- dataset[i,]
if(di$grupHor != 'Nit') {
if(di$hor < 600) {
if(di$hor < 60)
di$hor = di$hor * 100
else
di$hor = di$hor * 10
}
} else {
if(di$hor < 60 || (220 <= di$hor && di$hor < 240)) {
if(di$hor < 6 || (21 < di$hor && di$hor < 24))
di$hor = di$hor * 100
else if(0 < di$hor%%10 && di$hor%%10 < 5)
di$hor = di$hor * 10
}
}
dataset$hor[i] = di$hor%/%100
dataset$min[i] = di$hor%%100
}
dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
dataset$diaSem = factor(weekdays(dataset$data))
dataset$mes = factor(months(dataset$data))
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 999] = NA
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 99] = NA
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 0] = NA
write.csv(dataset, file = ds.file)
} else {
dataset <- read.csv(ds.file)
dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
}
nom.hor <- format(ISOdate(0,1,1, 0:23),"%Hh")
color.grupHor <- c(rep('darkblue', 6), rep('yellow', 8), rep('orange', 8), rep('darkblue', 2))
val.hor <- 0:24
d.hora.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$hor, mean)[0:24],
morts = tapply(dataset$F_MORTS, dataset$hor, mean)[0:24],
n = tapply(dataset$F_VICTIMES, dataset$hor, length)[0:24],
nom = nom.hor,
color = color.grupHor)
ggplot(data = d.hora.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
coord_polar()
ggplot(data = d.hora.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
ggplot(data = d.hora.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
d.hora.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
hora = dataset$hor,
theta = dataset$hor * pi / 12)
d.hora.V <- circular.mean(tapply(d.hora.all$victimes, d.hora.all$theta, mean))
d.hora.M <- circular.mean(tapply(d.hora.all$morts, d.hora.all$theta, mean))
d.hora.theta.V <- d.hora.V[1]
d.hora.var.V <- d.hora.V[2]
d.hora.theta.M <- d.hora.M[1]
d.hora.var.M <- d.hora.M[2]
n.rep <- 10000
n.set <- nrow(d.hora.all)
d.hora.rep <- tapply(d.hora.all$hora, d.hora.all$theta, length)
p.hora.theta.V <- numeric(n.rep)
p.hora.var.V <- numeric(n.rep)
p.hora.theta.M <- numeric(n.rep)
p.hora.var.M <- numeric(n.rep)
for(i in 1:n.rep) {
perm <- sample(n.set)
p.hora.V <- circular.mean(resample.means(d.hora.all$victimes, d.hora.rep ,perm))
p.hora.M <- circular.mean(resample.means(d.hora.all$morts, d.hora.rep, perm))
p.hora.theta.V[i] <- p.hora.V[1]
p.hora.var.V[i] <- p.hora.V[2]
p.hora.theta.M[i] <- p.hora.M[1]
p.hora.var.M[i] <- p.hora.M[2]
}
plot.hora.limit.V <- max(c(p.hora.var.V, d.hora.var.V * 1.1))
hist(p.hora.var.V, xlim = c(0, plot.hora.limit.V), main = '"Variancia" de l\'hora per victimes')
p.value.hora.V <- sum(p.hora.var.V >= d.hora.var.V) / n.rep
lines(rep(d.hora.var.V, 2), c(0, n.rep), col = 'red')
plot.hora.limit.M <- max(c(p.hora.var.M, d.hora.var.M * 1.1))
hist(p.hora.var.M, xlim = c(0, plot.hora.limit.M), main = '"Variancia" de l\'hora per morts')
p.value.hora.M <- sum(p.hora.var.M >= d.hora.var.M) / n.rep
lines(rep(d.hora.var.M, 2), c(0, n.rep), col = 'red')
{
show(p.value.hora.V)
show(p.value.hora.M)
}
## [1] 0
## [1] 0
plot.hora.limitX.V <- c(min(p.hora.var.V * sin(p.hora.theta.V)), d.hora.var.V * sin(d.hora.theta.V) * 1.1)
plot.hora.limitY.V <- c(min(p.hora.var.V * cos(p.hora.theta.V)), d.hora.var.V * cos(d.hora.theta.V) * 1.1)
plot(p.hora.var.V * sin(p.hora.theta.V), p.hora.var.V * cos(p.hora.theta.V),
asp = 1, xlim = plot.hora.limitX.V, ylim = plot.hora.limitY.V, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.hora.var.V * sin(d.hora.theta.V), d.hora.var.V * cos(d.hora.theta.V),
col = 'red', cex = .5)
plot.hora.limitX.M <- c(min(p.hora.var.M * sin(p.hora.theta.M)), d.hora.var.M * sin(d.hora.theta.M) * 1.1)
plot.hora.limitY.M <- c(min(p.hora.var.M * cos(p.hora.theta.M)), d.hora.var.M * cos(d.hora.theta.M) * 1.1)
plot(p.hora.var.M * sin(p.hora.theta.M), p.hora.var.M * cos(p.hora.theta.M),
asp = 1, xlim = plot.hora.limitX.M, ylim = plot.hora.limitY.M, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.hora.var.M * sin(d.hora.theta.M), d.hora.var.M * cos(d.hora.theta.M),
col = 'red', cex = .5)
nom.diaSem <- weekdays(as.Date('24/05/2021', '%d/%m/%Y')+0:6)
nom.tipusDia <- c(rep('laborable', 5), rep('fi de setmana', 2))
color.feiner <- c(rep('red', 5), rep('blue', 2))
d.dia.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$diaSem, mean)[nom.diaSem],
morts = tapply(dataset$F_MORTS, dataset$diaSem, mean)[nom.diaSem],
n = tapply(dataset$F_VICTIMES, dataset$diaSem, length)[nom.diaSem],
nom = factor(nom.diaSem, levels = nom.diaSem),
color = color.feiner,
tipus = factor(nom.tipusDia, levels = c('laborable', 'fi de setmana')))
ggplot(data = d.dia.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Dia de la setmana")
ggplot(data = d.dia.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Victimes") + ylab("Mitja") + xlab("Dia de la setmana")
ggplot(data = d.dia.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Morts") + ylab("Mitja") + xlab("Dia de la setmana")
# agafar dades respecte el dia de la setmana
d.dia.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
dia = factor(dataset$diaSem, levels = nom.diaSem),
tipus = factor(dataset$diaSem, levels = nom.diaSem))
levels(d.dia.all$tipus) <- nom.tipusDia
# Generem els models per probar
d.dia.fit.lab.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'laborable']~
factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))
d.dia.fit.lab.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'laborable']~
factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))
d.dia.fit.dia6.V <- lm(d.dia.all$victimes[d.dia.all$dia != nom.diaSem[7]]~
factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))
d.dia.fit.dia6.M <- lm(d.dia.all$morts[d.dia.all$dia != nom.diaSem[7]]~
factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))
d.dia.fit.fds.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'fi de setmana']~
factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))
d.dia.fit.fds.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'fi de setmana']~
factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))
# Comparació amb test anova (aov)
{
cat('p-value per victimes comparant dies laborables:',
summary(aov(d.dia.fit.lab.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies laborables:',
summary(aov(d.dia.fit.lab.M))[[1]]$`Pr(>F)`[1], '\n')
cat('Tots els dies laborables són iguals\n\n')
cat('p-value per victimes comparant dies laborables i dissabte:',
summary(aov(d.dia.fit.dia6.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies laborables i dissabte:',
summary(aov(d.dia.fit.dia6.M))[[1]]$`Pr(>F)`[1], '\n')
cat('A dissabte el numero per accident augmenta (respecte els laborables)\n\n')
cat('p-value per victimes comparant dissabte i diumenge:',
summary(aov(d.dia.fit.fds.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies dissabte i diumenge:',
summary(aov(d.dia.fit.fds.M))[[1]]$`Pr(>F)`[1], '\n')
cat('A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)\n\n')
}
## p-value per victimes comparant dies laborables: 0.4398894
## p-value per morts comparant dies laborables: 0.961798
## Tots els dies laborables són iguals
##
## p-value per victimes comparant dies laborables i dissabte: 2.363101e-23
## p-value per morts comparant dies laborables i dissabte: 0.00154988
## A dissabte el numero per accident augmenta (respecte els laborables)
##
## p-value per victimes comparant dissabte i diumenge: 0.002842418
## p-value per morts comparant dies dissabte i diumenge: 0.2385609
## A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)
nom.mes <- format(ISOdate(0,1:12,1), '%B')
color.estacio <- c(rep('cyan', 2), rep('green', 3), rep('yellow', 3), rep('orange', 3), 'cyan')
d.mes.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$mes, mean)[nom.mes],
morts = tapply(dataset$F_MORTS, dataset$mes, mean)[nom.mes],
n = tapply(dataset$F_VICTIMES, dataset$mes, length)[nom.mes],
nom = factor(nom.mes, levels = nom.mes),
color = color.estacio)
ggplot(data = d.mes.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
coord_polar()
ggplot(data = d.mes.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
ggplot(data = d.mes.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
d.mes.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
mes = dataset$mes,
theta = as.numeric(format(dataset$data, "%m")) * pi / 6)
d.mes.V <- circular.mean(tapply(d.mes.all$victimes, d.mes.all$theta, mean))
d.mes.M <- circular.mean(tapply(d.mes.all$morts, d.mes.all$theta, mean))
d.mes.theta.V <- d.mes.V[1]
d.mes.var.V <- d.mes.V[2]
d.mes.theta.M <- d.mes.M[1]
d.mes.var.M <- d.mes.M[2]
n.rep <- 10000
n.set <- nrow(d.mes.all)
d.mes.rep <- tapply(d.mes.all$mes, d.mes.all$theta, length)
p.mes.theta.V <- numeric(n.rep)
p.mes.var.V <- numeric(n.rep)
p.mes.theta.M <- numeric(n.rep)
p.mes.var.M <- numeric(n.rep)
for(i in 1:n.rep) {
perm <- sample(n.set)
p.mes.V <- circular.mean(resample.means(d.mes.all$victimes, d.mes.rep ,perm))
p.mes.M <- circular.mean(resample.means(d.mes.all$morts, d.mes.rep, perm))
p.mes.theta.V[i] <- p.mes.V[1]
p.mes.var.V[i] <- p.mes.V[2]
p.mes.theta.M[i] <- p.mes.M[1]
p.mes.var.M[i] <- p.mes.M[2]
}
plot.mes.limit.V <- max(c(p.mes.var.V, d.mes.var.V * 1.1))
hist(p.mes.var.V, xlim = c(0, plot.mes.limit.V), main = '"Variancia" de l\'mes per victimes')
p.value.mes.V <- sum(p.mes.var.V >= d.mes.var.V) / n.rep
lines(rep(d.mes.var.V, 2), c(0, n.rep), col = 'red')
plot.mes.limit.M <- max(c(p.mes.var.M, d.mes.var.M * 1.1))
hist(p.mes.var.M, xlim = c(0, plot.mes.limit.M), main = '"Variancia" de l\'mes per morts')
p.value.mes.M <- sum(p.mes.var.M >= d.mes.var.M) / n.rep
lines(rep(d.mes.var.M, 2), c(0, n.rep), col = 'red')
{
show(p.value.mes.V)
show(p.value.mes.M)
}
## [1] 0.0679
## [1] 0.4235
plot(p.mes.var.V * sin(p.mes.theta.V), p.mes.var.V * cos(p.mes.theta.V),
asp = 1, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.mes.var.V * sin(d.mes.theta.V), d.mes.var.V * cos(d.mes.theta.V),
col = 'red', cex = .5)
plot(p.mes.var.M * sin(p.mes.theta.M), p.mes.var.M * cos(p.mes.theta.M),
asp = 1, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.mes.var.M * sin(d.mes.theta.M), d.mes.var.M * cos(d.mes.theta.M),
col = 'red', cex = .5)
nom.via <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length), decreasing = TRUE))
nom.viaComp <- c('Via urbana', 'Carretera\nconvencional',
'Altres', 'Autovia', 'Autopista',
'Camà rural\nPista forestal')
d.via.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, mean)[nom.via],
morts = tapply(dataset$F_MORTS, dataset$D_TIPUS_VIA, mean)[nom.via],
n = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length)[nom.via],
nom = factor(nom.viaComp, levels = nom.viaComp))
ggplot(data = d.via.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de via") +
scale_y_log10()
ggplot(data = d.via.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de via")
ggplot(data = d.via.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de via")
d.via.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
via = dataset$D_TIPUS_VIA)
n.rep = 10000
d.via.c.V <- tapply(d.via.all$victimes, d.via.all$via, c)
d.via.c.M <- tapply(d.via.all$morts, d.via.all$via, c)
p.via.mean.V <- list()
p.via.mean.M <- list()
for(via in nom.via) {
p.via.mean.V[[via]] = numeric(n.rep)
p.via.mean.M[[via]] = numeric(n.rep)
}
for(i in 1:n.rep) {
t.via.mean.V <- lapply(lapply(d.via.c.V, sample, replace = T), mean)
t.via.mean.M <- lapply(lapply(d.via.c.M, sample, replace = T), mean)
for(via in nom.via) {
p.via.mean.V[[via]][i] = t.via.mean.V[[via]]
p.via.mean.M[[via]][i] = t.via.mean.M[[via]]
}
}
p.via.VM <- data.frame(victimes = c(unlist(lapply(p.via.mean.V, mean)))[nom.via],
morts = c(unlist(lapply(p.via.mean.M, mean)))[nom.via],
nom = factor(nom.viaComp, levels = nom.viaComp))
for(i in 1:length(nom.via)) {
via = nom.via[i]
p.via.VM$IC.max.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.025)
p.via.VM$IC.min.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.975)
p.via.VM$IC.max.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.025)
p.via.VM$IC.min.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.975)
}
ggplot(data = p.via.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de via")
ggplot(data = p.via.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de via")
nom.carril <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length), decreasing = TRUE))
nom.carril <- nom.carril[nom.carril != 'Sense Especificar']
nom.carrilComp <- gsub("/", "\n", gsub(" ", "\n", nom.carril))
d.carril.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
morts = tapply(dataset$F_MORTS, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
n = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length)[nom.carril],
nom = factor(nom.carrilComp, levels = nom.carrilComp))
ggplot(data = d.carril.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de carril") +
scale_y_log10()
ggplot(data = d.carril.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")
ggplot(data = d.carril.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")
d.carril.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
carril = dataset$D_CARRIL_ESPECIAL)
n.rep = 10000
d.carril.c.V <- tapply(d.carril.all$victimes, d.carril.all$carril, c)
d.carril.c.M <- tapply(d.carril.all$morts, d.carril.all$carril, c)
p.carril.mean.V <- list()
p.carril.mean.M <- list()
for(carril in nom.carril) {
p.carril.mean.V[[carril]] = numeric(n.rep)
p.carril.mean.M[[carril]] = numeric(n.rep)
}
for(i in 1:n.rep) {
t.carril.mean.V <- lapply(lapply(d.carril.c.V, sample, replace = T), mean)
t.carril.mean.M <- lapply(lapply(d.carril.c.M, sample, replace = T), mean)
for(carril in nom.carril) {
p.carril.mean.V[[carril]][i] = t.carril.mean.V[[carril]]
p.carril.mean.M[[carril]][i] = t.carril.mean.M[[carril]]
}
}
p.carril.VM <- data.frame(victimes = c(unlist(lapply(p.carril.mean.V, mean)))[nom.carril],
morts = c(unlist(lapply(p.carril.mean.M, mean)))[nom.carril],
nom = factor(nom.carrilComp, levels = nom.carrilComp))
for(i in 1:length(nom.carril)) {
carril = nom.carril[i]
p.carril.VM$IC.max.V[i] = 2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.025)
p.carril.VM$IC.min.V[i] = max(2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.975), 0)
p.carril.VM$IC.max.M[i] = 2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.025)
p.carril.VM$IC.min.M[i] = max(2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.975), 0)
}
ggplot(data = p.carril.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")
ggplot(data = p.carril.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")
val.vel <- levels(factor(dataset$C_VELOCITAT_VIA))
val.vel10 <- seq(10, 120, 10)
nom.vel <- paste(val.vel, 'km/h')
d.vel.all <- data.frame(victimes = dataset$F_VICTIMES[!is.na(dataset$C_VELOCITAT_VIA)],
morts = dataset$F_MORTS[!is.na(dataset$C_VELOCITAT_VIA)],
velocitat = dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)],
vel10 = factor(dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)] %/% 10 * 10, levels = val.vel10))
d.vel.all$MpV100 <- d.vel.all$morts / d.vel.all$victimes * 100
d.vel.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$velocitat, mean)[val.vel],
morts = tapply(d.vel.all$morts, d.vel.all$velocitat, mean)[val.vel],
MpV100 = tapply(d.vel.all$MpV100, d.vel.all$velocitat, mean)[val.vel],
n = tapply(d.vel.all$victimes, d.vel.all$velocitat, length)[val.vel],
nom = factor(nom.vel, levels = nom.vel))
ggplot(data = d.vel.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Velocitat mà xima") +
scale_y_continuous(trans = 'log10')
ggplot(data = d.vel.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Victimes") + ylab("Mitja") + xlab("Velocitat mà xima")
ggplot(data = d.vel.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Morts") + ylab("Mitja") + xlab("Velocitat mà xima")
ggplot(data = d.vel.VM, aes(y = MpV100, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de vÃctimes mortes") + xlab("Velocitat mà xima")
d.vel.pearson.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'spearman')
n.rep <- 3000
p.vel.pearson.V <- numeric(n.rep)
p.vel.spearman.V <- numeric(n.rep)
p.vel.pearson.M <- numeric(n.rep)
p.vel.spearman.M <-numeric(n.rep)
p.vel.pearson.MpV100 <- numeric(n.rep)
p.vel.spearman.MpV100 <-numeric(n.rep)
for(i in 1:n.rep) {
p.vel <- sample(d.vel.all$velocitat)
p.vel.pearson.V[i] = cor(d.vel.all$victimes, p.vel, method = 'pearson')
p.vel.spearman.V[i] = cor(d.vel.all$victimes, p.vel, method = 'spearman')
p.vel.pearson.M[i] = cor(d.vel.all$morts, p.vel, method = 'pearson')
p.vel.spearman.M[i] = cor(d.vel.all$morts, p.vel, method = 'spearman')
p.vel.pearson.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'pearson')
p.vel.spearman.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'spearman')
}
plot.vel.limit.pearson.V <- c(min(p.vel.pearson.V) * 1.1,
max(c(p.vel.pearson.V, d.vel.pearson.V) * 1.1))
hist(p.vel.pearson.V, xlim = plot.vel.limit.pearson.V, main = 'Correlació entre velocitat i vÃctimes (pearson)')
p.value.vel.pearson.V <- sum(p.vel.pearson.V >= d.vel.pearson.V) / n.rep
lines(rep(d.vel.pearson.V, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.V <- c(min(p.vel.spearman.V) * 1.1,
max(c(p.vel.spearman.V, d.vel.spearman.V) * 1.1))
hist(p.vel.spearman.V, xlim = plot.vel.limit.spearman.V, main = 'Correlació entre velocitat i vÃctimes (spearman)')
p.value.vel.spearman.V <- sum(p.vel.spearman.V >= d.vel.spearman.V) / n.rep
lines(rep(d.vel.spearman.V, 2), c(0, n.rep), col = 'red')
plot.vel.limit.pearson.M <- c(min(p.vel.pearson.M) * 1.1,
max(c(p.vel.pearson.M, d.vel.pearson.M) * 1.1))
hist(p.vel.pearson.M, xlim = plot.vel.limit.pearson.M, main = 'Correlació entre velocitat i morts (pearson)')
p.value.vel.pearson.M <- sum(p.vel.pearson.M >= d.vel.pearson.M) / n.rep
lines(rep(d.vel.pearson.M, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.M <- c(min(p.vel.spearman.M) * 1.1,
max(c(p.vel.spearman.M, d.vel.spearman.M) * 1.1))
hist(p.vel.spearman.M, xlim = plot.vel.limit.spearman.M, main = 'Correlació entre velocitat i morts (spearman)')
p.value.vel.spearman.M <- sum(p.vel.spearman.M >= d.vel.spearman.M) / n.rep
lines(rep(d.vel.spearman.M, 2), c(0, n.rep), col = 'red')
plot.vel.limit.pearson.MpV100 <- c(min(p.vel.pearson.MpV100) * 1.1,
max(c(p.vel.pearson.MpV100, d.vel.pearson.MpV100) * 1.1))
hist(p.vel.pearson.MpV100, xlim = plot.vel.limit.pearson.MpV100, main = 'Correlació entre velocitat i percentatge de vÃctimes mortes (pearson)')
p.value.vel.pearson.MpV100 <- sum(p.vel.pearson.MpV100 >= d.vel.pearson.MpV100) / n.rep
lines(rep(d.vel.pearson.MpV100, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.MpV100 <- c(min(p.vel.spearman.MpV100) * 1.1,
max(c(p.vel.spearman.MpV100, d.vel.spearman.MpV100) * 1.1))
hist(p.vel.spearman.MpV100, xlim = plot.vel.limit.spearman.MpV100, main = 'Correlació entre velocitat i percentatge de vÃctimes mortes (spearman)')
p.value.vel.spearman.MpV100 <- sum(p.vel.spearman.MpV100 >= d.vel.spearman.MpV100) / n.rep
lines(rep(d.vel.spearman.MpV100, 2), c(0, n.rep), col = 'red')
{
show(p.value.vel.pearson.V)
show(p.value.vel.spearman.V)
show(p.value.vel.pearson.M)
show(p.value.vel.spearman.M)
show(p.value.vel.pearson.MpV100)
show(p.value.vel.spearman.MpV100)
}
## [1] 0
## [1] 0.047
## [1] 0
## [1] 0
## [1] 0
## [1] 0
d.vel.fit.V <- lm(victimes~velocitat, data = d.vel.all)
d.vel.fit.M <- lm(morts~velocitat, data = d.vel.all)
d.vel.fit.MpV100 <- lm(MpV100~velocitat, data = d.vel.all)
d.vel.10.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$vel10, mean),
morts = tapply(d.vel.all$morts, d.vel.all$vel10, mean),
MpV100 = tapply(d.vel.all$MpV100, d.vel.all$vel10, mean),
n = tapply(d.vel.all$victimes, d.vel.all$vel10, length),
vel10 = val.vel10)
ggplot(data = d.vel.10.VM, aes(y = victimes, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("VÃctimes") + ylab("Mitja") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.V)$coefficient[1],
slope = summary(d.vel.fit.V)$coefficient[2], color = 'red')
ggplot(data = d.vel.10.VM, aes(y = morts, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Morts") + ylab("Mitja") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.M)$coefficient[1],
slope = summary(d.vel.fit.M)$coefficient[2], color = 'red')
ggplot(data = d.vel.10.VM, aes(y = MpV100, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de vÃctimes mortes") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.MpV100)$coefficient[1],
slope = summary(d.vel.fit.MpV100)$coefficient[2], color = 'red')
d.vel.IC.V <- summary(d.vel.fit.V)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.V)$coefficient[4]
d.vel.IC.M <- summary(d.vel.fit.M)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.M)$coefficient[4]
d.vel.IC.MpV100 <- summary(d.vel.fit.MpV100)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.MpV100)$coefficient[4]
{
show(d.vel.IC.V)
show(d.vel.IC.M)
show(d.vel.IC.MpV100)
}
## [1] 0.001247525 0.003019904
## [1] 0.0004982629 0.0011039999
## [1] 0.02220343 0.06208294
d.visib.all <- data.frame(victimes = dataset$F_VICTIMES, morts = dataset$F_MORTS,
boira = dataset$D_BOIRA,
nit = dataset$grupHor,
llum = dataset$D_LLUMINOSITAT)
levels(d.visib.all$boira) <- c("No n'hi ha", 'Si')
levels(d.visib.all$nit) <- c('Dia', 'Nit', 'Dia')
levels(d.visib.all$llum) <- c('Reduida', 'Bona', 'Reduida', 'Reduida', 'Bona', 'Cap', 'NA')
d.visib.mean.Total <- c(tapply(d.visib.all$victimes, d.visib.all$boira, length),
tapply(d.visib.all$victimes, d.visib.all$nit, length),
tapply(d.visib.all$victimes, d.visib.all$llum, length))
d.visib.mean.V <- c(tapply(d.visib.all$victimes, d.visib.all$boira, mean),
tapply(d.visib.all$victimes, d.visib.all$nit, mean),
tapply(d.visib.all$victimes, d.visib.all$llum, mean))
d.visib.mean.M <- c(tapply(d.visib.all$morts, d.visib.all$boira, mean),
tapply(d.visib.all$morts, d.visib.all$nit, mean),
tapply(d.visib.all$morts, d.visib.all$llum, mean))
d.visib.mean.Total <- d.visib.mean.Total[names(d.visib.mean.Total) != 'NA']
d.visib.mean.V <- d.visib.mean.V[names(d.visib.mean.V) != 'NA']
d.visib.mean.M <- d.visib.mean.M[names(d.visib.mean.M) != 'NA']
d.visib.type <- factor(c(rep('boira', sum(levels(d.visib.all$boira) != 'NA')),
rep('nit', sum(levels(d.visib.all$nit) != 'NA')),
rep('iluminació', sum(levels(d.visib.all$llum) != 'NA'))),
levels = c('boira', 'nit', 'iluminació'))
d.visib.class <- factor(names(d.visib.mean.V),
levels = c('Si', "No n'hi ha", 'Dia', 'Nit', 'Bona', 'Reduida', 'Cap'))
d.visib.opt <- levels(d.visib.class)
d.visib.VM <- data.frame(victimes = d.visib.mean.V,
morts = d.visib.mean.M,
n = d.visib.mean.Total,
class = d.visib.class,
type = d.visib.type)
ggplot(d.visib.VM, aes(y = victimes, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Factor de visibilitat")
ggplot(d.visib.VM, aes(y = morts, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Factor de visibilitat")
n.rep <- 5000
bst.visib.mean.V <- data.frame(i = 1:n.rep)
bst.visib.mean.M <- data.frame(i = 1:n.rep)
for(i in 1:n.rep) {
for(j in 1:7) {
bst.visib.mean.V[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$victimes[j]))
bst.visib.mean.M[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$morts[j]))
}
}
bst.visib.VM <- data.frame(victimes = c(unlist(lapply(bst.visib.mean.V, mean)))[d.visib.opt],
IC.min.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
IC.max.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
morts = c(unlist(lapply(bst.visib.mean.M, mean)))[d.visib.opt],
IC.min.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
IC.max.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
class = factor(d.visib.opt, levels = d.visib.opt),
type = d.visib.type)
ggplot(data = bst.visib.VM, aes(y = victimes, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes") + ylab("Mitja") + xlab("Tipus de carril")
ggplot(data = bst.visib.VM, aes(y = morts, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts") + ylab("Mitja") + xlab("Tipus de carril")
nom.superficie <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length), decreasing = TRUE))
nom.superficie <- nom.superficie[nom.superficie != 'Sense especificar']
d.sup.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, mean)[nom.superficie],
morts = tapply(dataset$F_MORTS, dataset$D_SUPERFICIE, mean)[nom.superficie],
n = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length)[nom.superficie],
nom = factor(nom.superficie, levels = nom.superficie),
color = 'red')
ggplot(data = d.sup.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Temporal") +
scale_y_continuous(trans = 'log10')
ggplot(data = d.sup.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Victimes") + xlab("Temporal")
ggplot(data = d.sup.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Morts") + xlab("Temporal")
d.sup.type.n <- 1:length(nom.superficie)
d.sup.n <- sum(d.sup.VM$n)
d.sup.all <- data.frame(victimes = dataset$F_VICTIMES[dataset$D_SUPERFICIE != 'Sense especificar'],
morts = dataset$F_MORTS[dataset$D_SUPERFICIE != 'Sense especificar'],
superficie = dataset$D_SUPERFICIE[dataset$D_SUPERFICIE != 'Sense especificar'])
n.rep <- 10000
p.sup.V <- matrix(nrow = length(nom.superficie), ncol = n.rep)
p.sup.M <- matrix(nrow = length(nom.superficie), ncol = n.rep)
for(i in 1:n.rep) {
perm <- sample(d.sup.n, replace = T)
p.sup.V[, i] <- resample.means(d.sup.all$victimes, d.sup.VM$n, perm)
p.sup.M[, i] <- resample.means(d.sup.all$morts, d.sup.VM$n, perm)
}
dp.sup.VM <- data.frame(victimes = d.sup.VM$victimes,
morts = d.sup.VM$morts,
nom = factor(nom.superficie, levels = nom.superficie))
for(i in d.sup.type.n) {
dp.sup.VM$IC.min.V[i] = quantile(p.sup.V[i, ], 0.025)
dp.sup.VM$IC.max.V[i] = quantile(p.sup.V[i, ], 0.975)
dp.sup.VM$IC.min.M[i] = quantile(p.sup.M[i, ], 0.025)
dp.sup.VM$IC.max.M[i] = quantile(p.sup.M[i, ], 0.975)
}
ggplot(data = dp.sup.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Histograma de numero d\'accidents") + ylab("Victimes") + xlab("Temporal")
ggplot(data = dp.sup.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Histograma de numero d\'accidents") + ylab("Morts") + xlab("Temporal")